/*******************************************************************************
Paper title: Does early nutrition predict cognitive skills during later childhood?
Evidence from two developing countries
Alan Sanchez, Marta Favara, Margaret Sheridan, Jere Behrman.
Created: 8 OCt 2020
This version: 4 Dec 2023 
Structure: Estimations use adjusted HAZ/stunting and HFE methods
Content: Table 3 (panel B), Table 6 (panel B), Table C2 (panel A)	
*******************************************************************************/
			
clear all
local user Alan
			
if "`user'"=="Alan" {
global outdata	C:\Users\alans\Dropbox\_NIHR21_Proposal\NIH Papers\Paper 1_nutrition&EF\paper\WD\Replication files\Data
global output   C:\Users\alans\Dropbox\_NIHR21_Proposal\NIH Papers\Paper 1_nutrition&EF\paper\WD\Replication files\Output
}

global control0  chage_r4 chage_r4_2 female dm_educ2 dm_educ3 dm_educ4 urban_c /*singleparent_r1*/  hhsize_c
global country1  mom_spanish
global country2  mom_oromifa mom_tigrina mom_other

global control2q wi_cq2 wi_cq3 wi_cq4 wi_cq5

global control3q educexp_all_r2q2 educexp_all_r2q3 educexp_all_r2q4 educexp_all_r2q5
global control4q nfoodexp_r2q2 nfoodexp_r2q3 nfoodexp_r2q4 nfoodexp_r2q5
	
global task3 	itask3
global task2 	itask2_a
global task1 	task1
global task4 	itask4	

global btask3 	bitask3
global btask2 	bitask2_a
global btask1 	btask1
global btask4 	bitask4	

global alltask 	hr2 hr3 wkend /*practices*/
		
global cluster_r1 sib_y sib_o /*dclustid_r11- dclustid_r120*/
		
global sample1  zhfa_c<=6 & zhfa_c>=-6
global sample2  zhfa_c<=6 & zhfa_c>=-6

global absorb   childid
	
*---------------------------------------------------------------------------------------------------------------------*---------------------------------------------------------------------------------------------------------------------*------------------------------------------------------------------------------------------------------------------*/

use "$outdata\pe_et_childlevel_230821", clear
sort childid sibling
merge childid sibling using "$outdata\et_ppvt_r2"
tab _merge
drop if _merge==2
drop _merge

sort childid sibling
merge childid sibling using "$outdata\pe_ppvt_r2"
tab _merge
drop if _merge==2
drop _merge

sort childid sibling
merge childid sibling using "$outdata\et_ppvt_r3"
tab _merge
drop if _merge==2
drop _merge

sort childid sibling
merge childid sibling using "$outdata\pe_ppvt_r3"
tab _merge
drop if _merge==2
drop _merge

sort childid sibling
merge childid sibling using "$outdata\et_cppvt_r3"
tab _merge
drop if _merge==2
drop _merge

sort childid sibling
merge childid sibling using "$outdata\pe_cppvt_r3"
tab _merge
drop if _merge==2
drop _merge

gen  std_pe_ppvtr2=.
egen std_pe_ppvtr2_4=std(pe_ppvt_r2) if pe_ppvt_r2<60  & pe_ppvt_r2!=.
egen std_pe_ppvtr2_5=std(pe_ppvt_r2) if pe_ppvt_r2>=60 & pe_ppvt_r2!=.
***
replace std_pe_ppvtr2=std_pe_ppvtr2_4 if std_pe_ppvtr2_4!=.
replace std_pe_ppvtr2=std_pe_ppvtr2_5 if std_pe_ppvtr2_5!=.

gen  std_et_ppvtr2=.
egen std_et_ppvtr2_4=std(et_ppvt_r2) if et_ppvt_r2<60  & et_ppvt_r2!=.
egen std_et_ppvtr2_5=std(et_ppvt_r2) if et_ppvt_r2>=60 & et_ppvt_r2!=.
***
replace std_et_ppvtr2=std_et_ppvtr2_4 if std_et_ppvtr2_4!=.
replace std_et_ppvtr2=std_et_ppvtr2_5 if std_et_ppvtr2_5!=.

gen  std_pe_ppvtr3=.
egen std_pe_ppvtr3_3=std(pe_ppvt_r3) if pe_ppvt_r3<48                  & pe_ppvt_r3!=.
egen std_pe_ppvtr3_4=std(pe_ppvt_r3) if pe_ppvt_r3>=48 & pe_ppvt_r3<60 & pe_ppvt_r3!=.
egen std_pe_ppvtr3_5=std(pe_ppvt_r3) if pe_ppvt_r3>=60 & pe_ppvt_r3<72 & pe_ppvt_r3!=.
egen std_pe_ppvtr3_6=std(pe_ppvt_r3) if pe_ppvt_r3>=72                 & pe_ppvt_r3!=.
***
replace std_pe_ppvtr3=std_pe_ppvtr3_3 if std_pe_ppvtr3_3!=.
replace std_pe_ppvtr3=std_pe_ppvtr3_4 if std_pe_ppvtr3_4!=.
replace std_pe_ppvtr3=std_pe_ppvtr3_5 if std_pe_ppvtr3_5!=.
replace std_pe_ppvtr3=std_pe_ppvtr3_6 if std_pe_ppvtr3_6!=.

gen  std_et_ppvtr3=.
egen std_et_ppvtr3_3=std(et_ppvt_r3)  if et_ppvt_r3<48                    & et_ppvt_r3!=.
egen std_et_ppvtr3_4=std(et_ppvt_r3)  if et_ppvt_r3>=48 & et_ppvt_r3<60   & et_ppvt_r3!=.
egen std_et_ppvtr3_5=std(et_ppvt_r3)  if et_ppvt_r3>=60 & et_ppvt_r3<72   & et_ppvt_r3!=.
egen std_et_ppvtr3_6=std(et_ppvt_r3)  if et_ppvt_r3>=72 & et_ppvt_r3<84   & et_ppvt_r3!=.
egen std_et_ppvtr3_7=std(et_ppvt_r3)  if et_ppvt_r3>=84 & et_ppvt_r3<96   & et_ppvt_r3!=.
egen std_et_ppvtr3_8=std(et_ppvt_r3)  if et_ppvt_r3>=96 & et_ppvt_r3<108  & et_ppvt_r3!=.
egen std_et_ppvtr3_9=std(et_ppvt_r3)  if et_ppvt_r3>=108 & et_ppvt_r3<120 & et_ppvt_r3!=.
egen std_et_ppvtr3_10=std(et_ppvt_r3) if et_ppvt_r3>=120 & et_ppvt_r3<132 & et_ppvt_r3!=.
egen std_et_ppvtr3_11=std(et_ppvt_r3) if et_ppvt_r3>=132 & et_ppvt_r3<144 & et_ppvt_r3!=.
egen std_et_ppvtr3_12=std(et_ppvt_r3) if et_ppvt_r3>=144 & et_ppvt_r3<156 & et_ppvt_r3!=.
egen std_et_ppvtr3_13=std(et_ppvt_r3) if et_ppvt_r3>=156 & et_ppvt_r3<168 & et_ppvt_r3!=.
egen std_et_ppvtr3_14=std(et_ppvt_r3) if et_ppvt_r3>=168 & et_ppvt_r3<180 & et_ppvt_r3!=.
egen std_et_ppvtr3_15=std(et_ppvt_r3) if et_ppvt_r3>=180                  & et_ppvt_r3!=.
***
replace std_et_ppvtr3=std_et_ppvtr3_3  if std_et_ppvtr3_3!=.
replace std_et_ppvtr3=std_et_ppvtr3_4  if std_et_ppvtr3_4!=.
replace std_et_ppvtr3=std_et_ppvtr3_5  if std_et_ppvtr3_5!=.
replace std_et_ppvtr3=std_et_ppvtr3_6  if std_et_ppvtr3_6!=.
replace std_et_ppvtr3=std_et_ppvtr3_7  if std_et_ppvtr3_7!=.
replace std_et_ppvtr3=std_et_ppvtr3_8  if std_et_ppvtr3_8!=.
replace std_et_ppvtr3=std_et_ppvtr3_9  if std_et_ppvtr3_9!=.
replace std_et_ppvtr3=std_et_ppvtr3_10 if std_et_ppvtr3_10!=.
replace std_et_ppvtr3=std_et_ppvtr3_11 if std_et_ppvtr3_11!=.
replace std_et_ppvtr3=std_et_ppvtr3_12 if std_et_ppvtr3_12!=.
replace std_et_ppvtr3=std_et_ppvtr3_13 if std_et_ppvtr3_13!=.
replace std_et_ppvtr3=std_et_ppvtr3_14 if std_et_ppvtr3_14!=.
replace std_et_ppvtr3=std_et_ppvtr3_15 if std_et_ppvtr3_15!=.

gen std_ppvt_c=.
replace std_ppvt_c=std_et_ppvtr3 if sibling==1 & country==2
replace std_ppvt_c=std_pe_ppvtr3 if sibling==1 & country==1
replace std_ppvt_c=std_et_ppvtr2 if sibling==0 & country==2
replace std_ppvt_c=std_pe_ppvtr2 if sibling==0 & country==1

***Index R3 Peru
gen  std_pe_cppvtr3=.
egen std_pe_cppvtr3_8=std(pe_cppvt_r3) if pe_cppvt_r3<96  & pe_cppvt_r3!=.
egen std_pe_cppvtr3_9=std(pe_cppvt_r3) if pe_cppvt_r3>=96 & pe_cppvt_r3!=.
***
replace std_pe_cppvtr3=std_pe_cppvtr3_8 if std_pe_cppvtr3_8!=.
replace std_pe_cppvtr3=std_pe_cppvtr3_9 if std_pe_cppvtr3_9!=.
***Index R3 Ethiopia
gen  std_et_cppvtr3=.
egen std_et_cppvtr3_8=std(et_cppvt_r3) if et_cppvt_r3<96  & et_cppvt_r3!=.
egen std_et_cppvtr3_9=std(et_cppvt_r3) if et_cppvt_r3>=96 & et_cppvt_r3!=.
***
replace std_et_cppvtr3=std_et_cppvtr3_8 if std_et_cppvtr3_8!=.
replace std_et_cppvtr3=std_et_cppvtr3_9 if std_et_cppvtr3_9!=.

gen std_ppvt_c2=.
replace std_ppvt_c2=ppvtz_r4      if sibling==1 & country==2
replace std_ppvt_c2=ppvtz_r4      if sibling==1 & country==1
replace std_ppvt_c2=std_et_cppvtr3 if sibling==0 & country==2
replace std_ppvt_c2=std_pe_cppvtr3 if sibling==0 & country==1

***Dropping outliers
replace std_ppvt_c=. if std_ppvt_c<-4 & std_ppvt_c!=.
replace std_ppvt_c=. if std_ppvt_c>4  & std_ppvt_c!=.

replace std_ppvt_c2=. if std_ppvt_c2<-4 & std_ppvt_c2!=.
replace std_ppvt_c2=. if std_ppvt_c2>4  & std_ppvt_c2!=.

replace ppvtz_r4=. if ppvtz_r4<-4 & ppvtz_r4!=.
replace ppvtz_r4=. if ppvtz_r4>4  & ppvtz_r4!=.

***Requires for pooled sample estimations 
replace mom_spanish=0 if country==2 
replace mom_oromifa=0 if country==1  
replace mom_tigrina=0 if country==1 
replace mom_other=0   if country==1 

***Different cluster IDs are required for the pooled sample of two countries
replace clustid_r1=clustid_r1+100 if country==2

global country   mom_spanish mom_oromifa mom_tigrina mom_other

bysort childid sibling: egen n=max(task) /* keep only paired-siblings who answer all tasks*/
drop if n!=4 /*22 obs*/
drop n
est clear

egen hhgroup=group(childid)
xtset hhgroup

gen sib_y=0
gen sib_o=0
replace sib_y=1 if sibling==1 & sibyounger==1
replace sib_o=1 if sibling==1 & sibyounger==0 

gen zhfa_c=.
replace zhfa_c=zhfa_r3 if sibling==1
replace zhfa_c=zhfa_r2 if sibling==0

gen stunting_c=.
replace stunting_c=stunting_r3 if sibling==1
replace stunting_c=stunting_r2 if sibling==0

gen zhfaxsib_y=zhfa_c*sib_y
gen zhfaxsib_o=zhfa_c*sib_o
gen zhfaxage=zhfa_c*chage_r4

gen stuntingxsib_y=stunting_c*sib_y
gen stuntingxsib_o=stunting_c*sib_o
gen stuntingxage  =stunting_c*chage_r4

gen zhfaxfem=zhfa_c*female

gen stuntingxfem=stunting_c*female

gen chage_r4_2=chage_r4*chage_r4

rename euclideandave_delay_0 euclid_del 
rename euclideandave_dot_0   euclid_dot		
	
label var stunting_c     "Stunted"
label var stuntingxsib_y "Stunted x younger sib"
label var stuntingxsib_o "Stunted x older sib"
label var stuntingxfem   "Stunted x female"

label var zhfa_c         "Height for age"
label var zhfaxsib_y     "Height for age x younger sib"
label var zhfaxsib_o     "Height for age x older sib"
label var zhfaxfem       "Height for age x female"

label var chage_r4     "Age in months, r4"
label var chage_r4_2   "Age in months squared, r4"
label var female       "Child is female"
label var dm_educ2     "Maternal edu: complete primary"
label var dm_educ3     "Maternal edu: complete secondary"
label var dm_educ4     "Maternal edu: complete tertiary"
label var urban_r1     "Urban area, r1"
label var hhsize_r2    "Household size, r2" 
label var mom_spanish  "Maternal native tongue: spanish"
label var mom_oromifa  "Maternal native tongue: oromifah" 
label var mom_tigrina  "Maternal native tongue: tigrina"
label var mom_other    "Maternal native tongue: other"
label var wi_cq2      "Wealth index quintile 2"
label var wi_cq3      "Wealth index quintile 3"
label var wi_cq4      "Wealth index quintile 4"
label var wi_cq5      "Wealth index quintile 5"

********************************************************************************
*** Adjusted HAZ for older siblings in Ethiopia
********************************************************************************
sort  country childid sibling

merge country childid sibling using "$outdata\lasso_prediction_older.dta"
drop _merge

replace zhfa_c      =zhfa_5_p               if sibling==1 & sibyounger==0 & country==2
replace zhfaxsib_y  =zhfa_c*sib_y           if sibling==1 & sibyounger==0 & country==2
replace zhfaxsib_o  =zhfa_c*sib_o           if sibling==1 & sibyounger==0 & country==2
replace zhfaxage    =zhfa_c*chage_r4        if sibling==1 & sibyounger==0 & country==2

tab     stunting_c                 		    if sibling==1 & sibyounger==0 & country==2
replace stunting_c    =1          		    if zhfa_c<-2  & zhfa_c!=. & sibling==1 & sibyounger==0 & country==2
replace stunting_c    =0          		    if zhfa_c>=-2 & zhfa_c!=. & sibling==1 & sibyounger==0 & country==2
replace stuntingxsib_y=stunting_c*sib_y  	if sibling==1 & sibyounger==0 & country==2
replace stuntingxsib_o=stunting_c*sib_o   	if sibling==1 & sibyounger==0 & country==2
replace stuntingxage  =stunting_c*chage_r4 	if sibling==1 & sibyounger==0 & country==2
tab     stunting_c                   		if sibling==1 & sibyounger==0 & country==2

********************************************************************************
*** Adjusted for index children and younger siblings in Peru & Ethiopia
********************************************************************************
sort  country childid sibling
merge country childid sibling using "$outdata\prediction_index_younger.dta"
drop _merge

replace zhfa_c      =zhfa_5_p1              if sibyounger!=0
replace zhfaxsib_y  =zhfa_c*sib_y           if sibyounger!=0
replace zhfaxsib_o  =zhfa_c*sib_o           if sibyounger!=0
replace zhfaxage    =zhfa_c*chage_r4        if sibyounger!=0

tab     stunting_c                 		    if sibyounger!=0
replace stunting_c    =1          		    if zhfa_c<-2  & zhfa_c!=. & sibyounger!=0
replace stunting_c    =0          		    if zhfa_c>=-2 & zhfa_c!=. & sibyounger!=0
replace stuntingxsib_y=stunting_c*sib_y  	if sibyounger!=0
replace stuntingxsib_o=stunting_c*sib_o   	if sibyounger!=0
replace stuntingxage  =stunting_c*chage_r4 	if sibyounger!=0
tab     stunting_c                   		if sibyounger!=0

******************************
*** Results
******************************

global nut stunting_c

******************************
*** Table 3, panel B (main specification)
******************************

***Pooled sample
xtreg $task3 $nut $control0 $control2q $country $alltask $btask3  $cluster_r1 if $sample1 , fe vce(cluster childid)
estimates store reg5
xtreg $task2 $nut $control0 $control2q $country $alltask $btask2  $cluster_r1 if $sample1 , fe vce(cluster childid)
estimates store reg2
xtreg $task1 $nut $control0 $control2q $country $alltask $btask1  $cluster_r1 if $sample1 , fe vce(cluster childid) 
estimates store reg1
xtreg $task4 $nut $control0 $control2q $country $alltask $btask4  $cluster_r1 if $sample1 , fe vce(cluster childid)
estimates store reg4

***Peru
xtreg $task3 $nut $control0 $control2q $country1 $alltask $btask3  $cluster_r1 if $sample1 & country==1, fe vce(cluster childid)
estimates store reg5p
xtreg $task2 $nut $control0 $control2q $country1 $alltask $btask2  $cluster_r1 if $sample1 & country==1, fe vce(cluster childid)
estimates store reg2p
xtreg $task1 $nut $control0 $control2q $country1 $alltask $btask1  $cluster_r1 if $sample1 & country==1, fe vce(cluster childid) 
estimates store reg1p
xtreg $task4 $nut $control0 $control2q $country1 $alltask $btask4  $cluster_r1 if $sample1 & country==1, fe vce(cluster childid)
estimates store reg4p

***Ethiopia
xtreg $task3 $nut $control0 $control2q $country2 $alltask $btask3  $cluster_r1 if $sample1 & country==2, fe vce(cluster childid)
estimates store reg5e
xtreg $task2 $nut $control0 $control2q $country2 $alltask $btask2  $cluster_r1 if $sample1 & country==2, fe vce(cluster childid)
estimates store reg2e
xtreg $task1 $nut $control0 $control2q $country2 $alltask $btask1  $cluster_r1 if $sample1 & country==2, fe vce(cluster childid) 
estimates store reg1e
xtreg $task4 $nut $control0 $control2q $country2 $alltask $btask4  $cluster_r1 if $sample1 & country==2, fe vce(cluster childid)
estimates store reg4e

xml_tab reg5 reg2 reg1 reg4 reg5p reg2p reg1p reg4p reg5e reg2e reg1e reg4e, ///
save("$output\reg_071023_2.xls") replace tstat below sheet("Table_3B") stats(N r2_a)

************************************
*** Table 6, panel B (baseline as outcome)
************************************

***Pooled sample
xtreg $btask3 $nut $control0 $control2q $country $alltask $cluster_r1 if $sample1 , fe vce(cluster childid)
estimates store reg5
xtreg $btask2 $nut $control0 $control2q $country $alltask $cluster_r1 if $sample1 , fe vce(cluster childid)
estimates store reg2

***Peru
xtreg $btask3 $nut $control0 $control2q $country1 $alltask $cluster_r1 if $sample1 & country==1, fe vce(cluster childid)
estimates store reg5p
xtreg $btask2 $nut $control0 $control2q $country1 $alltask $cluster_r1 if $sample1 & country==1, fe vce(cluster childid)
estimates store reg2p

***Ethiopia
xtreg $btask3 $nut $control0 $control2q $country2 $alltask $cluster_r1 if $sample1 & country==2, fe vce(cluster childid)
estimates store reg5e
xtreg $btask2 $nut $control0 $control2q $country2 $alltask $cluster_r1 if $sample1 & country==2, fe vce(cluster childid)
estimates store reg2e

xml_tab reg5 reg2 reg5p reg2p reg5e reg2e, ///
save("$output\reg_071023_2.xls") append tstat below sheet("Table_6B") stats(N r2_a)

******************************
*** Table C2, panel B (with HAZ instead of stunting)
******************************
global nut zhfa_c

***Pooled sample
xtreg $task3 $nut $control0 $control2q $country $alltask $btask3  $cluster_r1 if $sample1 , fe vce(cluster childid)
estimates store reg5
xtreg $task2 $nut $control0 $control2q $country $alltask $btask2  $cluster_r1 if $sample1 , fe vce(cluster childid)
estimates store reg2
xtreg $task1 $nut $control0 $control2q $country $alltask $btask1  $cluster_r1 if $sample1 , fe vce(cluster childid)
estimates store reg1
xtreg $task4 $nut $control0 $control2q $country $alltask $btask4  $cluster_r1 if $sample1 , fe vce(cluster childid)
estimates store reg4

***Peru
xtreg $task3 $nut $control0 $control2q $country1 $alltask $btask3  $cluster_r1 if $sample1 & country==1, fe vce(cluster childid)
estimates store reg5p
xtreg $task2 $nut $control0 $control2q $country1 $alltask $btask2  $cluster_r1 if $sample1 & country==1, fe vce(cluster childid)
estimates store reg2p
xtreg $task1 $nut $control0 $control2q $country1 $alltask $btask1  $cluster_r1 if $sample1 & country==1, fe vce(cluster childid) 
estimates store reg1p
xtreg $task4 $nut $control0 $control2q $country1 $alltask $btask4  $cluster_r1 if $sample1 & country==1, fe vce(cluster childid)
estimates store reg4p

***Ethiopia
xtreg $task3 $nut $control0 $control2q $country2 $alltask $btask3  $cluster_r1 if $sample1 & country==2, fe vce(cluster childid)
estimates store reg5e
xtreg $task2 $nut $control0 $control2q $country2 $alltask $btask2  $cluster_r1 if $sample1 & country==2, fe vce(cluster childid)
estimates store reg2e
xtreg $task1 $nut $control0 $control2q $country2 $alltask $btask1  $cluster_r1 if $sample1 & country==2, fe vce(cluster childid) 
estimates store reg1e
xtreg $task4 $nut $control0 $control2q $country2 $alltask $btask4  $cluster_r1 if $sample1 & country==2, fe vce(cluster childid)
estimates store reg4e

xml_tab reg5 reg2 reg1 reg4 reg5p reg2p reg1p reg4p reg5e reg2e reg1e reg4e, ///
save("$output\reg_071023_2.xls") append tstat below sheet("Table_C2B") stats(N r2_a)
